library(tidyverse)
library(survival)
library(rstan)
library(bayesplot)
set.seed(13960043)
aml package:survival R Documentation
Acute Myelogenous Leukemia survival data
Description:
Survival in patients with Acute Myelogenous Leukemia. The
question at the time was whether the standard course of
chemotherapy should be extended ('maintainance') for additional
cycles.
Usage:
aml
leukemia
Format:
time: survival or censoring time
status: censoring status
x: maintenance chemotherapy given? (factor)
Source:
Rupert G. Miller (1997), _Survival Analysis_. John Wiley & Sons.
ISBN: 0-471-25218-2.
data(leukemia, package = "survival")
leukemia <- as_tibble(leukemia) %>%
mutate(id = seq_len(n())) %>%
select(id, everything())
leukemia
## # A tibble: 23 x 4
## id time status x
## <int> <dbl> <dbl> <fct>
## 1 1 9 1 Maintained
## 2 2 13 1 Maintained
## 3 3 13 0 Maintained
## 4 4 18 1 Maintained
## 5 5 23 1 Maintained
## 6 6 28 0 Maintained
## 7 7 31 1 Maintained
## 8 8 34 1 Maintained
## 9 9 45 0 Maintained
## 10 10 48 1 Maintained
## # … with 13 more rows
Check distribution of event times
leukemia_summary <- leukemia %>%
filter(status == 1) %>%
summarize(n = n(),
mean_time = mean(time),
quantiles = list(quantile(time, probs = seq(from = 0, to = 1, by = 0.2)))) %>%
unnest()
leukemia_summary
## # A tibble: 6 x 3
## n mean_time quantiles
## <int> <dbl> <dbl>
## 1 18 23.1 5
## 2 18 23.1 8.4
## 3 18 23.1 17.
## 4 18 23.1 27.6
## 5 18 23.1 33.6
## 6 18 23.1 48
coxph1 <- coxph(formula = Surv(time, status) ~ as.integer(x == "Maintained"),
data = leukemia,
ties = c("efron","breslow","exact")[1])
summary(coxph1)
## Call:
## coxph(formula = Surv(time, status) ~ as.integer(x == "Maintained"),
## data = leukemia, ties = c("efron", "breslow", "exact")[1])
##
## n= 23, number of events= 18
##
## coef exp(coef) se(coef) z Pr(>|z|)
## as.integer(x == "Maintained") -0.9155 0.4003 0.5119 -1.788 0.0737 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## as.integer(x == "Maintained") 0.4003 2.498 0.1468 1.092
##
## Concordance= 0.619 (se = 0.063 )
## Likelihood ratio test= 3.38 on 1 df, p=0.07
## Wald test = 3.2 on 1 df, p=0.07
## Score (logrank) test = 3.42 on 1 df, p=0.06
## Load and compile
piecewise_ph_model <- rstan::stan_model("./bayesianideas_piecewise2.stan")
piecewise_ph_model
## S4 class stanmodel 'bayesianideas_piecewise2' coded as follows:
## data {
## // Hypeparameters for lambda[1]
## real<lower=0> w1;
## real<lower=0> lambda1_star;
## // Hyperparameter for lambda[k]
## real<lower=0> w;
## real<lower=0> lambda_star;
## // Hyperparameter for beta
## real beta_mean;
## real<lower=0> beta_sd;
## // Number of pieces
## int<lower=0> K;
## // Cutopoints on time
## // cutpoints[1] = 0
## // max(event time) < cutpoints[K+1] < Inf
## // K+1 elements
## real cutpoints[K+1];
## //
## int<lower=0> N;
## int<lower=0,upper=1> cens[N];
## real y[N];
## int<lower=0,upper=1> x[N];
## //
## // grids for evaluating posterior predictions
## int<lower=0> grid_size;
## real grid[grid_size];
## // Whether to evalulate the likelihood in model {}
## // https://twitter.com/JamesBland_Econ/status/1138402318645432320
## int<lower=0,upper=1> eval_likelihood;
## }
##
## transformed data {
## }
##
## parameters {
## // Baseline hazards
## real<lower=0> lambda[K];
## // Effect of group
## real beta;
## }
##
## transformed parameters {
## }
##
## model {
## // Prior on beta
## target += normal_lpdf(beta | beta_mean, beta_sd);
##
## // Loop over pieces of time (intervals).
## for (k in 1:K) {
## // k = 1,2,...,K
## // cutpoints[1] = 0
## // cutpoints[K+1] > max event time
## real length = cutpoints[k+1] - cutpoints[k];
##
## // Prior on lambda
## // BIDA 13.2.5 Priors for lambda
## if (k == 1) {
## // The first interval requires special handling when it is empty by design.
## target += gamma_lpdf(lambda[1] | lambda1_star * length * w1, length * w1);
## } else {
## // Mean lambda_star
## target += gamma_lpdf(lambda[k] | lambda_star * length * w, length * w);
## }
##
## // Likelihood contribution
## // BIDA 13.2.3 Likelihood for piecewise hazard PH model
## if (eval_likelihood == 1) {
## // Only evaluate likelihood if asked.
## for (i in 1:N) {
## // Linear predictor
## real lp = beta * x[i];
## // Everyone will contribute to the survival part.
## if (y[i] >= cutpoints[k+1]) {
## // If surviving beyond the end of the interval,
## // contribute survival throughout the interval.
## target += -exp(lp) * (lambda[k] * length);
## //
## } else if (cutpoints[k] <= y[i] && y[i] < cutpoints[k+1]) {
## // If ending follow up during the interval,
## // contribute survival until the end of follow up.
## target += -exp(lp) * (lambda[k] * (y[i] - cutpoints[k]));
## //
## // Event individuals also contribute to the hazard part.
## if (cens[i] == 1) {
## target += lp + log(lambda[k]);
## }
## } else {
## // If having ended follow up before this interval,
## // no contribution in this interval.
## }
## }
## }
## }
## }
##
## generated quantities {
## // Hazard function evaluated at grid points
## real<lower=0> h0_grid[grid_size];
## // Cumulative hazard function at grid points
## real<lower=0> H0_grid[grid_size];
## // Survival function at grid points
## real<lower=0> S0_grid[grid_size];
## real<lower=0> S1_grid[grid_size];
## // Time zero cumulative hazard should be zero.
## H0_grid[1] = 0;
##
## // Loop over grid points
## for (g in 1:grid_size) {
## // Loop over cutpoints
## for (k in 1:K) {
## // At each k, hazard is constant at lambda[k]
## if (cutpoints[k] <= grid[g] && grid[g] < cutpoints[k+1]) {
## h0_grid[g] = lambda[k];
## break;
## }
## }
## // Set grid points beyond the last time cutoff to zeros.
## if (grid[g] >= cutpoints[K+1]) {
## h0_grid[g] = 0;
## }
## // Cumulative hazard
## if (g > 1) {
## // This double loop is very inefficient.
## // Index starts at 2!
## for (gg in 2:g) {
## // Width between current grid points
## real width = grid[gg] - grid[gg-1];
## // Width x hazard value at first grid point.
## // This is approximation and is incorrect for grid points
## // between which the hazard changes.
## // Previous cumulative + current contribution.
## H0_grid[g] = H0_grid[g-1] + (width * h0_grid[gg-1]);
## }
## }
## // Survival
## S0_grid[g] = exp(-H0_grid[g]);
## S1_grid[g] = S0_grid[g]^exp(beta);
## }
## }
## Cutpoints every 20% of events
cutpoints_20 <- as.numeric(leukemia_summary$quantiles)
## First cutpoint should be time 0.
cutpoints_20[1] <- 0
## Last cutpoint should be larger than the maximum failure time.
cutpoints_20[length(cutpoints_20)] <- cutpoints_20[length(cutpoints_20)] + 1
## Show
cutpoints_20
## [1] 0.0 8.4 17.0 27.6 33.6 49.0
## Entire time as a single interval for exponential model
cutpoints_100 <- c(0, max(cutpoints_20))
cutpoints_100
## [1] 0 49
## All unique event times
cutpoints_all <- unique(sort(leukemia$time[leukemia$status == 1]))
cutpoints_all <- c(0, cutpoints_all, max(cutpoints_all)+1)
cutpoints_all
## [1] 0 5 8 9 12 13 18 23 27 30 31 33 34 43 45 48 49
## Evaluation grid for survival function
grid <- seq(from = 0, to = max(leukemia_summary$quantiles), by = 0.1)
exponential_prior_sample <-
rstan::sampling(object = piecewise_ph_model,
data = list(w1 = 10^4,
lambda1_star = 0.01,
w = 0.01,
lambda_star = 0.05,
beta_mean = 0,
beta_sd = 10,
K = length(cutpoints_100) - 1,
cutpoints = cutpoints_100,
N = length(leukemia$time),
cens = leukemia$status,
y = leukemia$time,
x = as.integer(leukemia$x == "Maintained"),
grid_size = length(grid),
grid = grid,
## Do not use likelihood
eval_likelihood = 0))
check_hmc_diagnostics(exponential_prior_sample)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print(exponential_prior_sample, pars = c("lambda","beta","lp__"))
## Inference for Stan model: bayesianideas_piecewise2.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## lambda[1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3006 1
## beta -0.06 0.17 9.86 -19.32 -6.68 -0.20 6.67 18.87 3423 1
## lp__ -0.88 0.02 0.98 -3.55 -1.27 -0.58 -0.18 0.08 1760 1
##
## Samples were drawn using NUTS(diag_e) at Sun Jun 30 17:59:39 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot(exponential_prior_sample, inc_warmup = TRUE, pars = c("lambda","beta","lp__"))
traceplot(exponential_prior_sample, inc_warmup = FALSE, pars = c("lambda","beta","lp__"))
bayesplot::mcmc_rank_overlay(exponential_prior_sample, regex_pars = c("lambda","beta","lp__"))
Baseline hazard function.
exponential_prior_sample %>%
as.matrix(pars = "h0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline cumulative hazard function.
exponential_prior_sample %>%
as.matrix(pars = "H0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline survival estimate for the Nonmaintained group.
exponential_prior_sample %>%
as.matrix(pars = "S0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Survival estimate for the Maintained group.
exponential_prior_sample %>%
as.matrix(pars = "S1_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
exponential_sample <-
rstan::sampling(object = piecewise_ph_model,
data = list(w1 = 10^4,
lambda1_star = 0.01,
w = 0.01,
lambda_star = 0.05,
beta_mean = 0,
beta_sd = 10,
K = length(cutpoints_100) - 1,
cutpoints = cutpoints_100,
N = length(leukemia$time),
cens = leukemia$status,
y = leukemia$time,
x = as.integer(leukemia$x == "Maintained"),
grid_size = length(grid),
grid = grid,
## Do use likelihood
eval_likelihood = 1))
check_hmc_diagnostics(exponential_sample)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print(exponential_sample, pars = c("lambda","beta","lp__"))
## Inference for Stan model: bayesianideas_piecewise2.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## lambda[1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3344 1
## beta 0.74 0.01 0.39 -0.07 0.50 0.77 1.02 1.43 2674 1
## lp__ -87.63 0.02 0.96 -90.11 -88.02 -87.34 -86.93 -86.68 1796 1
##
## Samples were drawn using NUTS(diag_e) at Sun Jun 30 18:00:08 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot(exponential_sample, inc_warmup = TRUE, pars = c("lambda","beta","lp__"))
traceplot(exponential_sample, inc_warmup = FALSE, pars = c("lambda","beta","lp__"))
bayesplot::mcmc_rank_overlay(exponential_sample, regex_pars = c("lambda","beta","lp__"))
Baseline hazard function.
exponential_sample %>%
as.matrix(pars = "h0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline cumulative hazard function.
exponential_sample %>%
as.matrix(pars = "H0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline survival estimate for the Nonmaintained group.
exponential_sample %>%
as.matrix(pars = "S0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Survival estimate for the Maintained group.
exponential_sample %>%
as.matrix(pars = "S1_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
piecewise_ph_prior_sample <-
rstan::sampling(object = piecewise_ph_model,
data = list(w1 = 10^4,
lambda1_star = 0.01,
w = 0.01,
lambda_star = 0.05,
beta_mean = 0,
beta_sd = 10,
K = length(cutpoints_20) - 1,
cutpoints = cutpoints_20,
N = length(leukemia$time),
cens = leukemia$status,
y = leukemia$time,
x = as.integer(leukemia$x == "Maintained"),
grid_size = length(grid),
grid = grid,
## Do not use likelihood
eval_likelihood = 0))
## Warning: There were 3153 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
check_hmc_diagnostics(piecewise_ph_prior_sample)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print(piecewise_ph_prior_sample, pars = c("lambda","beta","lp__"))
## Inference for Stan model: bayesianideas_piecewise2.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## lambda[1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 248 1.02
## lambda[2] 0.03 0.01 0.38 0.00 0.00 0.00 0.00 0.03 2243 1.00
## lambda[3] 0.02 0.00 0.23 0.00 0.00 0.00 0.00 0.03 3221 1.00
## lambda[4] 0.01 0.00 0.19 0.00 0.00 0.00 0.00 0.00 3319 1.00
## lambda[5] 0.05 0.02 0.64 0.00 0.00 0.00 0.00 0.12 676 1.00
## beta 0.96 0.69 10.06 -19.61 -5.82 1.24 7.80 19.92 213 1.02
## lp__ -26.51 0.16 1.82 -30.70 -27.67 -26.20 -25.14 -23.66 135 1.02
##
## Samples were drawn using NUTS(diag_e) at Sun Jun 30 18:00:39 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot(piecewise_ph_prior_sample, inc_warmup = TRUE, pars = c("lambda","beta","lp__"))
traceplot(piecewise_ph_prior_sample, inc_warmup = FALSE, pars = c("lambda","beta","lp__"))
bayesplot::mcmc_rank_overlay(piecewise_ph_prior_sample, regex_pars = c("lambda","beta","lp__"))
Baseline hazard function.
piecewise_ph_prior_sample %>%
as.matrix(pars = "h0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline cumulative hazard function.
piecewise_ph_prior_sample %>%
as.matrix(pars = "H0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline survival estimate for the Nonmaintained group.
piecewise_ph_prior_sample %>%
as.matrix(pars = "S0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Survival estimate for the Maintained group.
piecewise_ph_prior_sample %>%
as.matrix(pars = "S1_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
piecewise_ph_sample <-
rstan::sampling(object = piecewise_ph_model,
data = list(w1 = 10^4,
lambda1_star = 0.01,
w = 0.01,
lambda_star = 0.05,
beta_mean = 0,
beta_sd = 10,
K = length(cutpoints_20) - 1,
cutpoints = cutpoints_20,
N = length(leukemia$time),
cens = leukemia$status,
y = leukemia$time,
x = as.integer(leukemia$x == "Maintained"),
grid_size = length(grid),
grid = grid,
## Do use likelihood
eval_likelihood = 1))
check_hmc_diagnostics(piecewise_ph_sample)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print(piecewise_ph_sample, pars = c("lambda","beta","lp__"))
## Inference for Stan model: bayesianideas_piecewise2.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## lambda[1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 4691 1
## lambda[2] 0.03 0.00 0.02 0.01 0.02 0.02 0.04 0.07 4935 1
## lambda[3] 0.04 0.00 0.02 0.01 0.02 0.04 0.05 0.09 4095 1
## lambda[4] 0.08 0.00 0.05 0.02 0.05 0.07 0.11 0.21 4152 1
## lambda[5] 0.09 0.00 0.05 0.02 0.05 0.08 0.12 0.22 4060 1
## beta -0.55 0.01 0.52 -1.62 -0.89 -0.54 -0.21 0.45 3077 1
## lp__ -103.84 0.04 1.85 -108.36 -104.81 -103.49 -102.48 -101.32 1713 1
##
## Samples were drawn using NUTS(diag_e) at Sun Jun 30 18:01:11 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot(piecewise_ph_sample, inc_warmup = TRUE, pars = c("lambda","beta","lp__"))
traceplot(piecewise_ph_sample, inc_warmup = FALSE, pars = c("lambda","beta","lp__"))
bayesplot::mcmc_rank_overlay(piecewise_ph_sample, regex_pars = c("lambda","beta","lp__"))
Baseline hazard function.
piecewise_ph_sample %>%
as.matrix(pars = "h0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline cumulative hazard function.
piecewise_ph_sample %>%
as.matrix(pars = "H0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline survival estimate for the Nonmaintained group.
piecewise_ph_sample %>%
as.matrix(pars = "S0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Survival estimate for the Maintained group.
piecewise_ph_sample %>%
as.matrix(pars = "S1_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
piecewise_ph_all_prior_sample <-
rstan::sampling(object = piecewise_ph_model,
data = list(w1 = 10^4,
lambda1_star = 0.01,
w = 0.01,
lambda_star = 0.05,
beta_mean = 0,
beta_sd = 10,
K = length(cutpoints_all) - 1,
cutpoints = cutpoints_all,
N = length(leukemia$time),
cens = leukemia$status,
y = leukemia$time,
x = as.integer(leukemia$x == "Maintained"),
grid_size = length(grid),
grid = grid,
eval_likelihood = 1))
check_hmc_diagnostics(piecewise_ph_all_prior_sample)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print(piecewise_ph_all_prior_sample, pars = c("lambda","beta","lp__"))
## Inference for Stan model: bayesianideas_piecewise2.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## lambda[1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3824 1
## lambda[2] 0.05 0.00 0.03 0.01 0.02 0.04 0.07 0.14 4472 1
## lambda[3] 0.17 0.00 0.12 0.02 0.08 0.15 0.23 0.48 4373 1
## lambda[4] 0.03 0.00 0.03 0.00 0.01 0.02 0.04 0.11 5500 1
## lambda[5] 0.10 0.00 0.10 0.00 0.03 0.07 0.13 0.37 5127 1
## lambda[6] 0.02 0.00 0.02 0.00 0.01 0.01 0.03 0.09 4521 1
## lambda[7] 0.02 0.00 0.02 0.00 0.01 0.02 0.03 0.09 5094 1
## lambda[8] 0.07 0.00 0.05 0.01 0.03 0.06 0.10 0.20 4042 1
## lambda[9] 0.06 0.00 0.06 0.00 0.02 0.04 0.08 0.24 4598 1
## lambda[10] 0.22 0.00 0.23 0.01 0.06 0.15 0.30 0.85 4857 1
## lambda[11] 0.12 0.00 0.12 0.00 0.03 0.08 0.16 0.45 5131 1
## lambda[12] 0.31 0.00 0.32 0.01 0.08 0.20 0.42 1.15 5138 1
## lambda[13] 0.04 0.00 0.04 0.00 0.01 0.03 0.05 0.14 5013 1
## lambda[14] 0.26 0.00 0.29 0.00 0.07 0.17 0.36 1.06 4600 1
## lambda[15] 0.62 0.01 0.75 0.01 0.16 0.38 0.78 2.79 3509 1
## lambda[16] 3.79 0.09 4.98 0.08 0.89 2.13 4.82 17.22 3319 1
## beta -1.22 0.01 0.54 -2.39 -1.57 -1.21 -0.86 -0.19 2564 1
## lp__ -178.94 0.09 3.25 -186.21 -180.86 -178.64 -176.60 -173.60 1352 1
##
## Samples were drawn using NUTS(diag_e) at Sun Jun 30 18:01:43 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot(piecewise_ph_all_prior_sample, inc_warmup = TRUE, pars = c("lambda","beta","lp__"))
traceplot(piecewise_ph_all_prior_sample, inc_warmup = FALSE, pars = c("lambda","beta","lp__"))
bayesplot::mcmc_rank_overlay(piecewise_ph_all_prior_sample, regex_pars = c("lambda","beta","lp__"))
Baseline hazard function.
piecewise_ph_all_prior_sample %>%
as.matrix(pars = "h0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline cumulative hazard function.
piecewise_ph_all_prior_sample %>%
as.matrix(pars = "H0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline survival estimate for the Nonmaintained group.
piecewise_ph_all_prior_sample %>%
as.matrix(pars = "S0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Survival estimate for the Maintained group.
piecewise_ph_all_prior_sample %>%
as.matrix(pars = "S1_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
piecewise_ph_all_sample <-
rstan::sampling(object = piecewise_ph_model,
data = list(w1 = 10^4,
lambda1_star = 0.01,
w = 0.01,
lambda_star = 0.05,
beta_mean = 0,
beta_sd = 10,
K = length(cutpoints_all) - 1,
cutpoints = cutpoints_all,
N = length(leukemia$time),
cens = leukemia$status,
y = leukemia$time,
x = as.integer(leukemia$x == "Maintained"),
grid_size = length(grid),
grid = grid,
eval_likelihood = 1))
check_hmc_diagnostics(piecewise_ph_all_sample)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print(piecewise_ph_all_sample, pars = c("lambda","beta","lp__"))
## Inference for Stan model: bayesianideas_piecewise2.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## lambda[1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 5010 1
## lambda[2] 0.05 0.00 0.04 0.01 0.02 0.04 0.07 0.14 4829 1
## lambda[3] 0.18 0.00 0.13 0.02 0.08 0.14 0.24 0.51 4633 1
## lambda[4] 0.03 0.00 0.03 0.00 0.01 0.02 0.04 0.10 4920 1
## lambda[5] 0.10 0.00 0.11 0.00 0.03 0.06 0.14 0.39 4742 1
## lambda[6] 0.02 0.00 0.02 0.00 0.01 0.02 0.03 0.08 5319 1
## lambda[7] 0.02 0.00 0.02 0.00 0.01 0.02 0.03 0.09 5132 1
## lambda[8] 0.07 0.00 0.05 0.01 0.03 0.06 0.10 0.21 5563 1
## lambda[9] 0.06 0.00 0.06 0.00 0.02 0.04 0.08 0.22 4698 1
## lambda[10] 0.21 0.00 0.22 0.00 0.06 0.15 0.30 0.80 4958 1
## lambda[11] 0.12 0.00 0.12 0.00 0.03 0.08 0.16 0.45 5049 1
## lambda[12] 0.30 0.00 0.31 0.01 0.09 0.20 0.41 1.16 4658 1
## lambda[13] 0.04 0.00 0.04 0.00 0.01 0.03 0.05 0.14 4221 1
## lambda[14] 0.26 0.00 0.28 0.01 0.07 0.17 0.36 1.02 4808 1
## lambda[15] 0.62 0.01 0.80 0.01 0.14 0.36 0.79 2.80 3412 1
## lambda[16] 3.66 0.08 4.56 0.10 0.86 2.19 4.71 15.34 3541 1
## beta -1.22 0.01 0.54 -2.31 -1.57 -1.21 -0.85 -0.20 2936 1
## lp__ -179.08 0.09 3.29 -186.49 -181.06 -178.65 -176.79 -173.60 1210 1
##
## Samples were drawn using NUTS(diag_e) at Sun Jun 30 18:02:24 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot(piecewise_ph_all_sample, inc_warmup = TRUE, pars = c("lambda","beta","lp__"))
traceplot(piecewise_ph_all_sample, inc_warmup = FALSE, pars = c("lambda","beta","lp__"))
bayesplot::mcmc_rank_overlay(piecewise_ph_all_sample, regex_pars = c("lambda","beta","lp__"))
Baseline hazard function.
piecewise_ph_all_sample %>%
as.matrix(pars = "h0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline cumulative hazard function.
piecewise_ph_all_sample %>%
as.matrix(pars = "H0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Baseline survival estimate for the Nonmaintained group.
piecewise_ph_all_sample %>%
as.matrix(pars = "S0_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Survival estimate for the Maintained group.
piecewise_ph_all_sample %>%
as.matrix(pars = "S1_grid") %>%
as_tibble() %>%
`names<-`(as.character(grid)) %>%
mutate(iter = seq_len(n())) %>%
gather(key = time, value = value, -iter) %>%
mutate(time = as.numeric(time)) %>%
filter(iter %in% sample(1:max(iter), size = 500)) %>%
##
ggplot(mapping = aes(x = time, y = value, group = iter)) +
geom_line(alpha = 0.1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
print(sessionInfo())
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bayesplot_1.7.0 rstan_2.18.2 StanHeaders_2.18.1-10 survival_2.44-1.1 forcats_0.4.0
## [6] stringr_1.4.0 dplyr_0.8.1 purrr_0.3.2 readr_1.3.1 tidyr_0.8.3
## [11] tibble_2.1.3 ggplot2_3.2.0 tidyverse_1.2.1 doRNG_1.7.1 rngtools_1.3.1.1
## [16] pkgmaker_0.27 registry_0.5-1 doParallel_1.0.14 iterators_1.0.10 foreach_1.4.4
## [21] knitr_1.23
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.0 jsonlite_1.6 splines_3.6.0 modelr_0.1.4 assertthat_0.2.1
## [6] stats4_3.6.0 cellranger_1.1.0 yaml_2.2.0 pillar_1.4.1 backports_1.1.4
## [11] lattice_0.20-38 glue_1.3.1 digest_0.6.19 rvest_0.3.4 colorspace_1.4-1
## [16] htmltools_0.3.6 Matrix_1.2-17 plyr_1.8.4 pkgconfig_2.0.2 bibtex_0.4.2
## [21] broom_0.5.2 haven_2.1.0 xtable_1.8-4 scales_1.0.0 processx_3.3.1
## [26] generics_0.0.2 withr_2.1.2 lazyeval_0.2.2 cli_1.1.0 magrittr_1.5
## [31] crayon_1.3.4 readxl_1.3.1 evaluate_0.14 ps_1.3.0 fansi_0.4.0
## [36] nlme_3.1-140 xml2_1.2.0 pkgbuild_1.0.3 tools_3.6.0 loo_2.1.0
## [41] prettyunits_1.0.2 hms_0.4.2 matrixStats_0.54.0 munsell_0.5.0 callr_3.2.0
## [46] compiler_3.6.0 rlang_0.4.0 grid_3.6.0 ggridges_0.5.1 rstudioapi_0.10
## [51] labeling_0.3 rmarkdown_1.13 gtable_0.3.0 codetools_0.2-16 inline_0.3.15
## [56] reshape2_1.4.3 R6_2.4.0 gridExtra_2.3 lubridate_1.7.4 zeallot_0.1.0
## [61] utf8_1.1.4 stringi_1.4.3 Rcpp_1.0.1 vctrs_0.1.0 tidyselect_0.2.5
## [66] xfun_0.8
## Record execution time and multicore use
end_time <- Sys.time()
diff_time <- difftime(end_time, start_time, units = "auto")
cat("Started ", as.character(start_time), "\n",
"Finished ", as.character(end_time), "\n",
"Time difference of ", diff_time, " ", attr(diff_time, "units"), "\n",
"Used ", foreach::getDoParWorkers(), " cores\n",
"Used ", foreach::getDoParName(), " as backend\n",
sep = "")
## Started 2019-06-30 17:58:55
## Finished 2019-06-30 18:02:58
## Time difference of 4.04374 mins
## Used 12 cores
## Used doParallelMC as backend